{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a9bff53e",
   "metadata": {},
   "source": [
    "# Lesson 1: Advanced RAG Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "feb98470-c136-471d-a63e-d50d8eb09c57",
   "metadata": {
    "height": 98,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import utils\n",
    "\n",
    "import os\n",
    "import openai\n",
    "openai.api_key = utils.get_openai_api_key()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31e2859b-596e-40b3-867b-f4d6e91f74bc",
   "metadata": {
    "height": 98,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index import SimpleDirectoryReader\n",
    "\n",
    "documents = SimpleDirectoryReader(\n",
    "    input_files=[\"./eBook-How-to-Build-a-Career-in-AI.pdf\"]\n",
    ").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d7d0857-b9d1-4feb-8243-bfd2f4953acd",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(type(documents), \"\\n\")\n",
    "print(len(documents), \"\\n\")\n",
    "print(type(documents[0]))\n",
    "print(documents[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3123d3d",
   "metadata": {},
   "source": [
    "## Basic RAG pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4abc806-64f5-46bb-8c9f-6469ecb18d20",
   "metadata": {
    "height": 64,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index import Document\n",
    "\n",
    "document = Document(text=\"\\n\\n\".join([doc.text for doc in documents]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "afc2baff-5e8b-4733-9899-16f248777b23",
   "metadata": {
    "height": 183,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index import VectorStoreIndex\n",
    "from llama_index import ServiceContext\n",
    "from llama_index.llms import OpenAI\n",
    "\n",
    "llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0.1)\n",
    "service_context = ServiceContext.from_defaults(\n",
    "    llm=llm, embed_model=\"local:BAAI/bge-small-en-v1.5\"\n",
    ")\n",
    "index = VectorStoreIndex.from_documents([document],\n",
    "                                        service_context=service_context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae52a26c-7d0c-44df-8043-4c7f19f794b9",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b0d5b6e-cc2e-4648-b28c-5fa25a97d175",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "response = query_engine.query(\n",
    "    \"What are steps to take when finding projects to build your experience?\"\n",
    ")\n",
    "print(str(response))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e50a1ac5",
   "metadata": {},
   "source": [
    "## Evaluation setup using TruLens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ead7dc1-71b2-4001-918f-bf8d610fd3fd",
   "metadata": {
    "height": 132,
    "tags": []
   },
   "outputs": [],
   "source": [
    "eval_questions = []\n",
    "with open('eval_questions.txt', 'r') as file:\n",
    "    for line in file:\n",
    "        # Remove newline character and convert to integer\n",
    "        item = line.strip()\n",
    "        print(item)\n",
    "        eval_questions.append(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "87a278f8",
   "metadata": {
    "height": 64
   },
   "outputs": [],
   "source": [
    "# You can try your own question:\n",
    "new_question = \"What is the right AI job for me?\"\n",
    "eval_questions.append(new_question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d5204e8",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "print(eval_questions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c063c9c7-bf1e-4b24-9a22-d4281c0f954e",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from trulens_eval import Tru\n",
    "tru = Tru()\n",
    "\n",
    "tru.reset_database()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "03802825-6ce4-4563-aeec-d8d57e095ad1",
   "metadata": {},
   "source": [
    "For the classroom, we've written some of the code in helper functions inside a utils.py file.  \n",
    "- You can view the utils.py file in the file directory by clicking on the \"Jupyter\" logo at the top of the notebook.\n",
    "- In later lessons, you'll get to work directly with the code that's currently wrapped inside these helper functions, to give you more options to customize your RAG pipeline."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f754bed-d16f-4c8d-a1a1-b36096272570",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from utils import get_prebuilt_trulens_recorder\n",
    "\n",
    "tru_recorder = get_prebuilt_trulens_recorder(query_engine,\n",
    "                                             app_id=\"Direct Query Engine\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4dbdfbcc-aac7-4805-9894-4fc016c66bf6",
   "metadata": {
    "height": 64,
    "tags": []
   },
   "outputs": [],
   "source": [
    "with tru_recorder as recording:\n",
    "    for question in eval_questions:\n",
    "        response = query_engine.query(question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e14f512b-601c-42d0-bfac-bf41d9c577e7",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "records, feedback = tru.get_records_and_feedback(app_ids=[])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2da4a602-0d56-4bf8-9fa6-03ef0b7e254b",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "records.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64310897-179b-4081-aab8-f08a3392a078",
   "metadata": {
    "height": 47,
    "tags": []
   },
   "outputs": [],
   "source": [
    "# launches on http://localhost:8501/\n",
    "tru.run_dashboard()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a4eedcef",
   "metadata": {},
   "source": [
    "## Advanced RAG pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9a17ea2b",
   "metadata": {},
   "source": [
    "### 1. Sentence Window retrieval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dae4a668-3699-4750-82f7-e53ae1bca3a7",
   "metadata": {
    "height": 64,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.llms import OpenAI\n",
    "\n",
    "llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78f7678f-358d-448d-b153-11ac8e96a7fc",
   "metadata": {
    "height": 149,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from utils import build_sentence_window_index\n",
    "\n",
    "sentence_index = build_sentence_window_index(\n",
    "    document,\n",
    "    llm,\n",
    "    embed_model=\"local:BAAI/bge-small-en-v1.5\",\n",
    "    save_dir=\"sentence_index\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72f904c3-9845-4df5-9d2e-e5115160f987",
   "metadata": {
    "height": 64,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from utils import get_sentence_window_query_engine\n",
    "\n",
    "sentence_window_engine = get_sentence_window_query_engine(sentence_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f59e2314-7cac-42f4-a552-9a8e4db641eb",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "window_response = sentence_window_engine.query(\n",
    "    \"how do I get started on a personal project in AI?\"\n",
    ")\n",
    "print(str(window_response))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5c10917-8846-4e73-838d-6232c906a7db",
   "metadata": {
    "height": 115,
    "tags": []
   },
   "outputs": [],
   "source": [
    "tru.reset_database()\n",
    "\n",
    "tru_recorder_sentence_window = get_prebuilt_trulens_recorder(\n",
    "    sentence_window_engine,\n",
    "    app_id = \"Sentence Window Query Engine\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11710e67-aba8-479e-8585-c4c611e2c1d2",
   "metadata": {
    "height": 98,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for question in eval_questions:\n",
    "    with tru_recorder_sentence_window as recording:\n",
    "        response = sentence_window_engine.query(question)\n",
    "        print(question)\n",
    "        print(str(response))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "869d1e55-729b-45f2-a0f9-773c49d4616f",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "tru.get_leaderboard(app_ids=[])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0b92d0f2-2e80-48d5-92af-b3655eb03ea2",
   "metadata": {
    "height": 47,
    "tags": []
   },
   "outputs": [],
   "source": [
    "# launches on http://localhost:8501/\n",
    "tru.run_dashboard()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a92e2c55",
   "metadata": {},
   "source": [
    "### 2. Auto-merging retrieval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "558c639b-31eb-4c34-b6c4-fe6ae5717733",
   "metadata": {
    "height": 149,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from utils import build_automerging_index\n",
    "\n",
    "automerging_index = build_automerging_index(\n",
    "    documents,\n",
    "    llm,\n",
    "    embed_model=\"local:BAAI/bge-small-en-v1.5\",\n",
    "    save_dir=\"merging_index\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b32265f2-0247-42df-9abe-97d52f69edcf",
   "metadata": {
    "height": 98,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from utils import get_automerging_query_engine\n",
    "\n",
    "automerging_query_engine = get_automerging_query_engine(\n",
    "    automerging_index,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "918ed568-220e-4c7c-aa60-cfa58ef1fcbd",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "auto_merging_response = automerging_query_engine.query(\n",
    "    \"How do I build a portfolio of AI projects?\"\n",
    ")\n",
    "print(str(auto_merging_response))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db4f18a9-7b8a-4ae2-ab11-3a6a941a5afc",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "tru.reset_database()\n",
    "\n",
    "tru_recorder_automerging = get_prebuilt_trulens_recorder(automerging_query_engine,\n",
    "                                                         app_id=\"Automerging Query Engine\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99cc2cfe-7096-4fa0-aa72-094bebac35a3",
   "metadata": {
    "height": 98,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for question in eval_questions:\n",
    "    with tru_recorder_automerging as recording:\n",
    "        response = automerging_query_engine.query(question)\n",
    "        print(question)\n",
    "        print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5404dec1-60ca-42fa-ac13-793a5423aa64",
   "metadata": {
    "height": 30,
    "tags": []
   },
   "outputs": [],
   "source": [
    "tru.get_leaderboard(app_ids=[])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f545d41-0d98-446f-8214-8b59bef08d6c",
   "metadata": {
    "height": 47,
    "tags": []
   },
   "outputs": [],
   "source": [
    "# launches on http://localhost:8501/\n",
    "tru.run_dashboard()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
